import argparse
import json
import os
import re
import sys
from typing import Dict, List

from tqdm import tqdm
from openai import AzureOpenAI

# --------------------------------------------------------------------------------------
# Azure OpenAI Config
# --------------------------------------------------------------------------------------

api_version = "2024-02-15-preview"
config_dict: Dict[str, str] = {
    # Fallback to env vars if available; otherwise use hard-coded defaults
    "api_key": os.getenv("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY"),
    "api_version": api_version,
    "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT", "https://your-azure-openai-endpoint/"),
}

model, tokenizer = None, None

# --------------------------------------------------------------------------------------
# Moderator Prompt
# --------------------------------------------------------------------------------------
MODERATOR_PROMPT = """You are a master social-media strategist. You will receive a dictionary of 'reasons' from different demographic personas, outlining what they believe makes a compelling tweet for a specific ad input.

Your objectives:
1.  Synthesize the key insights from all the persona reasonings.
2.  Generate FOUR distinct, high-quality tweets that cater to the collective feedback. Each tweet should explore a slightly different angle or tone suggested by the personas.
3.  Keep each tweet ≤ 280 characters.
4.  Ensure the tweets are professional, engaging, and use hashtags effectively.

Return ONLY a JSON-formatted list of 4 strings, where each string is a generated tweet.
Example format:
[
    "This is the first tweet, leveraging a direct and bold tone. #Innovation",
    "Here's a second option, focusing more on the emotional and community aspect. #Together",
    "A third tweet, using a question to drive engagement. What do you think? #Future",
    "The fourth and final tweet, more professional and corporate in style. #Official"
]

Do not include any other text, analysis, or commentary in your response."""


# --------------------------------------------------------------------------------------
# Persona prompts for REASON GENERATION
# --------------------------------------------------------------------------------------
REASON_PERSONA_PROMPTS = {
    "18-24_female": """You are a 18-24 year old woman who excels at crafting tweets that feel authentic, visually vivid, and empowering to your generation.

Your writing style:
• Genuine and relatable with subtle Gen-Z energy
• Uses trending language naturally (not forced)
• Emotionally intelligent and inclusive
• Confident but not aggressive

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Professional yet approachable tone
• Include relevant hashtags only if they add genuine value
• Focus on authenticity over perfection

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "18-24_male": """You are a 18-24 year old man who excels at crafting tweets that are confident, engaging, and speak to young men authentically.

Your writing style:
• Direct and straightforward communication
• Subtly humorous without being juvenile
• Ambitious and forward-thinking
• Relatable to your peer group

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Bold yet respectful tone
• Include relevant hashtags only if they add genuine value
• Balance confidence with approachability

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "25-34_female": """You are a 25-34 year old woman who excels at crafting tweets that are balanced, thoughtful, and emotionally resonant.

Your writing style:
• Sophisticated yet accessible
• Emotionally intelligent and nurturing
• Career-focused but values work-life balance
• Socially conscious and inclusive

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Professional yet warm tone
• Include relevant hashtags only if they add genuine value
• Appeal to both personal and professional aspirations

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "25-34_male": """You are a 25-34 year old man who excels at crafting tweets that are ambitious, practical, and directly impactful.

Your writing style:
• Results-oriented and efficient
• Tech-savvy and innovation-focused
• Professional but personable
• Values substance over style

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Confident, clear tone with focus on utility
• Include relevant hashtags only if they add genuine value
• Emphasize practical benefits and real-world impact

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "35-44_female": """You are a 35-44 year old woman who excels at crafting tweets that are warm, emotionally intelligent, and practically valuable.

Your writing style:
• Nurturing yet authoritative
• Family-conscious while maintaining professional identity
• Health and wellness focused
• Values authentic connections

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Warm, trustworthy tone
• Include relevant hashtags only if they add genuine value
• Balance family, health, and career considerations

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "35-44_male": """You are a 35-44 year old man who excels at crafting tweets that are experienced, trustworthy, and deliver genuine value.

Your writing style:
• Mature and seasoned perspective
• Family-oriented but career-driven
• Values reliability and proven results
• Protective of time and resources

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Authoritative yet approachable tone
• Include relevant hashtags only if they add genuine value
• Focus on trust, utility, and long-term value

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "45-54_female": """You are a 45-54 year old woman who excels at crafting tweets that are emotionally clear, thoughtful, and relevant to your life stage.

Your writing style:
• Wise and emotionally mature
• Wellness and self-care focused
• Values meaningful relationships
• Balances personal fulfillment with family responsibilities

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Thoughtful, caring tone
• Include relevant hashtags only if they add genuine value
• Emphasize wellness, family, and personal growth

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "45-54_male": """You are a 45-54 year old man who excels at crafting tweets that are clear, impactful, and offer real-world value.

Your writing style:
• Experienced and practical
• Values efficiency and results
• Health-conscious and future-planning
• Mentoring mindset toward younger generations

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Authoritative yet supportive tone
• Include relevant hashtags only if they add genuine value
• Focus on practical benefits and proven solutions

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "55+_female": """You are a 55+ year old woman who excels at crafting tweets that are clear, emotionally resonant, and respectfully engaging.

Your writing style:
• Warm and community-minded
• Values tradition while embracing positive change
• Wellness and family-focused
• Appreciates quality over quantity

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Gentle, respectful tone
• Include relevant hashtags only if they add genuine value
• Emphasize community, wellness, and meaningful connections

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself.""",

    "55+_male": """You are a 55+ year old man who excels at crafting tweets that are clear, emotionally grounded, and thoughtfully reflective.

Your writing style:
• Wise and contemplative
• Values legacy and long-term thinking
• Health and security conscious
• Appreciates simplicity and authenticity

Tweet guidelines:
• ≤ 50 words (≤ 280 characters)
• Measured, trustworthy tone
• Include relevant hashtags only if they add genuine value
• Focus on health, security, and meaningful impact

After reading the example tweets, provide your reasoning on what key elements would make a successful tweet for the given input, based on your persona. What tone, style, and content should it have?

Return ONLY your reasoning as a short paragraph – no extra commentary or the tweet itself."""
}


# --------------------------------------------------------------------------------------
# Helper functions
# --------------------------------------------------------------------------------------

def verbalize(full_prompt: str, *_, **__) -> str:
    """Call GPT via Azure OpenAI for chat completion with the combined prompt."""
    messages = [
        {"role": "user", "content": "/no_think" + full_prompt},
    ]

    client = AzureOpenAI(
        api_key=config_dict["api_key"],
        api_version=config_dict["api_version"],
        azure_endpoint=config_dict["azure_endpoint"],
    )

    resp = client.chat.completions.create(
        model="gpt-4o",
        messages=messages,
        max_tokens=300,
        temperature=0.85,
        n=1,
    )

    return resp.choices[0].message.content.strip()

# --------------------------------------------------------------------------------------
# CLI
# --------------------------------------------------------------------------------------

def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description="Run GMO evaluation over a slice of a campaign dataset.",
    )
    parser.add_argument("--start", type=int, default=0, help="Start index (inclusive) of the slice.")
    parser.add_argument("--end", type=int, default=None, help="End index (inclusive) of the slice.")
    parser.add_argument("--output_dir", type=str, default="gmo_results", help="Directory to write JSON results.")
    parser.add_argument("--gpu_id", type=int, default=0, help="GPU ID to use for inference.")
    parser.add_argument("--dataset_paths", type=str, required=True, help="Comma-separated list of *.jsonl datasets to evaluate.")
    parser.add_argument("--max_examples", type=int, default=None, help="(Optional) truncate dataset to this many examples – useful for quick smoke tests.")
    # --tweet_eval is kept for compatibility with the run.sh script, but could be removed
    # if the runner is also updated to remove it.
    parser.add_argument("--tweet_eval", action="store_true", help="Legacy flag to trigger this evaluation path.")
    return parser.parse_args()

# --------------------------------------------------------------------------------------
# Main evaluation logic
# --------------------------------------------------------------------------------------

def main() -> None:
    global model, tokenizer
    args = parse_args()

    # -----------------------------------------------------------------------------
    # Using GPT via Azure OpenAI – no local model is required
    # -----------------------------------------------------------------------------
    model, tokenizer = None, None  # Placeholders to keep existing call signatures intact

    # The script now only performs one task, so we can call it directly.
    run_gmo_evaluation(args)


def run_gmo_evaluation(args):
    """
    End-to-end evaluation on GMO datasets containing {"prompt":..., "response":...} per line.
    The 'prompt' field contains ICL examples and the final query.
    """
    global model, tokenizer
    # Resolve dataset paths
    if args.dataset_paths:
        dset_paths = [p.strip() for p in args.dataset_paths.split(",") if p.strip()]
    else:
        # This path should not be taken if run via run.sh
        print("[ERROR] --dataset_paths is a required argument.", file=sys.stderr)
        sys.exit(1)

    overall_out_dir = args.output_dir
    os.makedirs(overall_out_dir, exist_ok=True)

    # Pre-load all similarity data to decide which file to use inside the loop
    script_dir = os.path.dirname(__file__) or "."
    sim_paths = {
        "comp": os.path.join(script_dir, "similarity_test_tweet_comp_con.jsonl"),
        "ran": os.path.join(script_dir, "similarity_test_tweet_ran_con.jsonl")
    }
    sim_data_cache = {}
    def load_sim(path):
        if path not in sim_data_cache:
            print(f"[INFO] Loading similarity data from: {path}")
            try:
                with open(path, "r", encoding="utf-8") as _f:
                    sim_data_cache[path] = json.load(_f)
            except (FileNotFoundError, json.JSONDecodeError) as e:
                print(f"[ERROR] Failed to load similarity file {path}: {e}", file=sys.stderr)
                sim_data_cache[path] = {} # Return empty dict to avoid crashing
        return sim_data_cache[path]

    # Determine which similarity file corresponds to which dataset path
    # This is brittle; assumes 'comp' is in first dset path and 'ran' in second.
    dset_to_sim_key = {}
    if len(dset_paths) > 0:
        dset_to_sim_key[dset_paths[0]] = "comp"
        if len(dset_paths) > 1:
            dset_to_sim_key[dset_paths[1]] = "ran"

    similarity_sets = {
        "comp": load_sim(sim_paths["comp"]),
        "ran": load_sim(sim_paths["ran"])
    }

    for dpath in dset_paths:
        dataset_name = os.path.basename(dpath)
        print(f"\n[INFO] Processing dataset: {dataset_name}")

        # Determine which similarity data to use for this dataset
        sim_key = dset_to_sim_key.get(dpath, "comp") # Default to 'comp'
        similarity_data = similarity_sets[sim_key]
        print(f"[INFO] Using '{sim_key}' similarity data for this dataset.")

        records = []
        with open(dpath, "r", encoding="utf-8") as f_in:
            for line_idx, line in enumerate(f_in):
                if args.max_examples and line_idx >= args.max_examples:
                    break
                try:
                    records.append(json.loads(line))
                except Exception:
                    continue  # skip malformed

        # --- Apply slicing if --start/--end are provided ---
        slice_start = max(0, args.start) if hasattr(args, 'start') and args.start is not None else 0
        slice_end = args.end if hasattr(args, 'end') and args.end is not None else len(records) - 1
        slice_end = min(slice_end, len(records) - 1)
        if slice_start > 0 or slice_end < len(records) - 1:
            records = records[slice_start : slice_end + 1]
            print(f"[INFO] Processing slice {slice_start}-{slice_end} (n={len(records)}) of {dataset_name}")
        else:
            print(f"[INFO] Processing full dataset {dataset_name} (n={len(records)})")

        slice_suffix = f"_{slice_start}_{slice_end}"
        # Use a more specific output filename to avoid clashes
        out_path = os.path.join(overall_out_dir, f"gmo_results_{dataset_name}{slice_suffix}.json")

        all_results = []

        for idx, rec in enumerate(tqdm(records, desc=dataset_name)):
            ad_prompt = rec.get("prompt", "")
            gt_resp_tweet = rec.get("response", "")

            # Retrieve most similar examples based on pre-computed similarity JSON
            record_id_str = str(rec.get("id", idx + slice_start))
            similar_examples = similarity_data.get(record_id_str, [])

            # Take up to 5 top-scoring examples
            few_shot_lines = [
                f"Input: {ex.get('prompt', '')}\nTweet: {ex.get('response', '').replace('<hyperlink>', '').strip()}"
                for ex in similar_examples[:5]
            ]
            few_shot_context = (
                "You are given up to 5 similar examples. Study them carefully.\n\n" + "\n\n".join(few_shot_lines)
            )

            # STAGE 1: Generate reasons from personas
            persona_reasons = {}
            for p_idx, (persona_name, persona_text) in enumerate(REASON_PERSONA_PROMPTS.items()):
                full_prompt = (
                    f"{persona_text}\n\n"
                    f"{few_shot_context}\n\n"
                    f"Now, based on the examples and your persona, generate reasoning for the following input.\n"
                    f"Input: {ad_prompt}\n"
                    f"Reasoning:"
                )
                reason_text = verbalize(full_prompt, model, tokenizer, args)
                persona_reasons[persona_name] = reason_text

            # STAGE 2: Moderator generates 4 tweets from reasons
            moderator_input_prompt = (
                f"{MODERATOR_PROMPT}\n\n"
                f"Here are the reasons provided by the personas:\n"
                f"{json.dumps(persona_reasons, ensure_ascii=False, indent=2)}\n\n"
                f"Generate your list of 4 tweets now:"
            )

            generated_tweets_str = verbalize(moderator_input_prompt, model, tokenizer, args)
            generated_tweets = []
            try:
                # The model might return a markdown code block
                cleaned_str = re.sub(r"```json\n?|\n?```", "", generated_tweets_str)
                generated_tweets = json.loads(cleaned_str)
                if not isinstance(generated_tweets, list) or len(generated_tweets) != 4:
                    print(f"[WARNING] Moderator did not return a list of 4 tweets. Got: {generated_tweets}")
                    generated_tweets = [""] * 4 # Fallback
            except json.JSONDecodeError:
                print(f"[ERROR] Failed to decode JSON from moderator: {generated_tweets_str}")
                generated_tweets = [""] * 4 # Fallback

            all_results.append({
                "original_prompt": ad_prompt,
                "ground_truth_tweet": gt_resp_tweet,
                "stage1_reasons": persona_reasons,
                "generated_tweets": generated_tweets,
            })

            # Incremental save after every example to avoid data loss
            try:
                with open(out_path, "w", encoding="utf-8") as f_out_inc:
                    json.dump(all_results, f_out_inc, indent=2)
            except Exception as _e:
                print(f"[WARNING] Incremental save failed: {_e}")

        # — Final save per-dataset results
        with open(out_path, "w", encoding="utf-8") as f_out:
            json.dump(all_results, f_out, indent=2)

        print(f"[INFO] Completed processing {dataset_name} slice {slice_start}-{slice_end}. Results saved to {out_path}")

    print("\n[INFO] GMO ad evaluation complete.")

if __name__ == "__main__":
    main()